In the previous notebook, we filtered and normalized the expression matrix. In this notebook, we aim to annotate all cells to their corresponding cell type. Furthermore, we will try to identify cycling cells.
library(scater)
library(scran)
library(Seurat)
library(ggpubr)
library(kBET)
library(cluster)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(viridis)
library(tidyverse)
source("bin/utils.R")
t_act_l <- readRDS("results/R_objects/t_act_Seurat_list_filtered_normalized.rds")
Note that this analysis was done with the first 2 donors (replicates). We will analyse the remaining two (rep2) in a subsequent notebook:
t_act_l <- t_act_l[c("day_0_rep1", "day_2_rep1")]
names(t_act_l) <- c("day_0", "day_2")
To cluster our cells, we need to overcome 2 challenges:
Thus, we aim to eliminate redundancy and denoise the dataset. Hence, we will find the subset of genes that drive most of the variability in the expression matrix (feature selection). We are going to analyze each day and donor separately, as we need as much resolution as possible to detect the markers.
t_act_0 <- SplitObject(t_act_l$day_0, split.by = "donor")
t_act_2 <- SplitObject(t_act_l$day_2, split.by = "donor")
t_act_l <- list(t_act_0$Male, t_act_0$Female, t_act_2$Male, t_act_2$Female)
names(t_act_l) <- c("0M", "0F", "2M", "2F")
t_act_l <- purrr::map(t_act_l, FindVariableFeatures)
var_plots <- purrr::map(t_act_l, VariableFeaturePlot)
var_plots <- purrr::map2(t_act_l, var_plots, function(seurat, p) {
LabelPoints(
plot = p,
points = head(VariableFeatures(seurat), 10),
repel = TRUE
)
})
var_plots
## $`0M`
##
## $`0F`
##
## $`2M`
##
## $`2F`
As we can see, we reduce the number of dimensions from >10,000 genes to ~2000 highly variable genes (HVG). We observe that among the top HVG there are well-known PBMC markers, such as LYZ, S100A8 (monocytes), GNLY and NKG7 (natural killer and CD8+ T cells). Moreover, we see that IFNG is amongst the most variable genes in day 2, which is consistent with the fact that the T cells in this dataset were activated with anti-CD3 antibodies.
An important pre-processing step in any cluster analysis is to scale the data, as otherwise variables with a higher mean will have a higher weight in the distance metric:
t_act_l <- purrr::map(t_act_l, ScaleData)
An additional challenge in our cluster analysis is that scRNAs-seq is very noisy (very susceptible to technical artifacts), and very sparse (contains drop-outs). Thus, differences in single genes may not be accurate to identify cell types. To that end, we can perform PCA, as each PC can be conceived as a ‘metagene’ that includes information across a correlated gene set. Furthermore, we will reduce the dimensionality even more:
t_act_l <- purrr::map(t_act_l, RunPCA)
purrr::map(t_act_l, VizDimLoadings, dims = 1:3, reduction = "pca")
## $`0M`
##
## $`0F`
##
## $`2M`
##
## $`2F`
purrr::map(t_act_l, DimPlot, reduction = "pca")
## $`0M`
##
## $`0F`
##
## $`2M`
##
## $`2F`
To cluster cells we will used the Seurat’s built-in functions FindNeighbors and FindClusters, which use the graph-based Louvain algorithm. The critical parameter for these functions is the resolution, which will determine the final number of clusters. To decide it, we will compute the silhouette width for varying resolutions, and choose the one that maximizes it.
t_act_l <- purrr::map(t_act_l, FindNeighbors, dims = 1:20)
resolutions <- c(
0.005,
seq(0.01, 0.1, by = 0.01),
seq(0.1, 1, by = 0.1), seq(1, 10, by = 1)
)
avgs_sil_width_dfs <- purrr::map2(t_act_l, names(t_act_l), function(seurat, condition) {
print(condition)
avgs_sil_width_df <- data.frame(num_k = c(), sil_width = c(), resolution = c())
num_k <- 1
for (res in resolutions) {
print(str_c("Current resolution is ", res))
seurat <- FindClusters(seurat, resolution = res, verbose = FALSE)
curr_num_k <- length(levels(seurat$seurat_clusters))
if (curr_num_k == num_k) {
next
} else {
sil_width <- calc_sil_width(
object = seurat,
clusters = seurat$seurat_clusters,
npcs = 5,
ncell = 2500
)
print(str_c("Current silhouette width is ", sil_width))
curr_df <- data.frame(num_k = curr_num_k, sil_width = sil_width, resolution = res)
avgs_sil_width_df <- rbind(avgs_sil_width_df, curr_df)
num_k <- curr_num_k
print(str_c("Current number of clusters is ", num_k))
}
}
avgs_sil_width_df
})
## [1] "0M"
## [1] "Current resolution is 0.005"
## [1] "Current silhouette width is -0.00990317498698955"
## [1] "Current number of clusters is 4"
## [1] "Current resolution is 0.01"
## [1] "Current resolution is 0.02"
## [1] "Current resolution is 0.03"
## [1] "Current resolution is 0.04"
## [1] "Current resolution is 0.05"
## [1] "Current resolution is 0.06"
## [1] "Current silhouette width is -0.0705609609545215"
## [1] "Current number of clusters is 5"
## [1] "Current resolution is 0.07"
## [1] "Current resolution is 0.08"
## [1] "Current resolution is 0.09"
## [1] "Current silhouette width is -0.0926675502184887"
## [1] "Current number of clusters is 6"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.2"
## [1] "Current silhouette width is -0.106183361595093"
## [1] "Current number of clusters is 9"
## [1] "Current resolution is 0.3"
## [1] "Current silhouette width is -0.0560642220878356"
## [1] "Current number of clusters is 10"
## [1] "Current resolution is 0.4"
## [1] "Current resolution is 0.5"
## [1] "Current silhouette width is -0.0607314587528802"
## [1] "Current number of clusters is 11"
## [1] "Current resolution is 0.6"
## [1] "Current resolution is 0.7"
## [1] "Current resolution is 0.8"
## [1] "Current resolution is 0.9"
## [1] "Current silhouette width is -0.0788722605822153"
## [1] "Current number of clusters is 14"
## [1] "Current resolution is 1"
## [1] "Current resolution is 1"
## [1] "Current resolution is 2"
## [1] "Current silhouette width is -0.13573366505029"
## [1] "Current number of clusters is 24"
## [1] "Current resolution is 3"
## [1] "Current silhouette width is -0.126700752962485"
## [1] "Current number of clusters is 28"
## [1] "Current resolution is 4"
## [1] "Current silhouette width is -0.147970248428551"
## [1] "Current number of clusters is 33"
## [1] "Current resolution is 5"
## [1] "Current silhouette width is -0.270915774589304"
## [1] "Current number of clusters is 41"
## [1] "Current resolution is 6"
## [1] "Current silhouette width is -0.212956421522404"
## [1] "Current number of clusters is 46"
## [1] "Current resolution is 7"
## [1] "Current silhouette width is -0.308938565096131"
## [1] "Current number of clusters is 55"
## [1] "Current resolution is 8"
## [1] "Current silhouette width is -0.372686532944749"
## [1] "Current number of clusters is 61"
## [1] "Current resolution is 9"
## [1] "Current silhouette width is -0.312004197758309"
## [1] "Current number of clusters is 65"
## [1] "Current resolution is 10"
## [1] "Current silhouette width is -0.366741757046182"
## [1] "Current number of clusters is 67"
## [1] "0F"
## [1] "Current resolution is 0.005"
## [1] "Current silhouette width is -0.198368446740192"
## [1] "Current number of clusters is 4"
## [1] "Current resolution is 0.01"
## [1] "Current resolution is 0.02"
## [1] "Current resolution is 0.03"
## [1] "Current resolution is 0.04"
## [1] "Current resolution is 0.05"
## [1] "Current resolution is 0.06"
## [1] "Current resolution is 0.07"
## [1] "Current silhouette width is -0.250966030668032"
## [1] "Current number of clusters is 5"
## [1] "Current resolution is 0.08"
## [1] "Current silhouette width is -0.231349839779394"
## [1] "Current number of clusters is 6"
## [1] "Current resolution is 0.09"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.2"
## [1] "Current silhouette width is -0.225690886650997"
## [1] "Current number of clusters is 8"
## [1] "Current resolution is 0.3"
## [1] "Current silhouette width is -0.0805477795435761"
## [1] "Current number of clusters is 10"
## [1] "Current resolution is 0.4"
## [1] "Current silhouette width is -0.245845298727118"
## [1] "Current number of clusters is 11"
## [1] "Current resolution is 0.5"
## [1] "Current silhouette width is -0.209231139505976"
## [1] "Current number of clusters is 13"
## [1] "Current resolution is 0.6"
## [1] "Current resolution is 0.7"
## [1] "Current silhouette width is -0.0944739055016626"
## [1] "Current number of clusters is 15"
## [1] "Current resolution is 0.8"
## [1] "Current resolution is 0.9"
## [1] "Current resolution is 1"
## [1] "Current resolution is 1"
## [1] "Current resolution is 2"
## [1] "Current silhouette width is -0.116676659786663"
## [1] "Current number of clusters is 22"
## [1] "Current resolution is 3"
## [1] "Current silhouette width is -0.125209706717626"
## [1] "Current number of clusters is 28"
## [1] "Current resolution is 4"
## [1] "Current silhouette width is -0.251949052251048"
## [1] "Current number of clusters is 34"
## [1] "Current resolution is 5"
## [1] "Current silhouette width is -0.192768769309449"
## [1] "Current number of clusters is 40"
## [1] "Current resolution is 6"
## [1] "Current silhouette width is -0.347549239779038"
## [1] "Current number of clusters is 45"
## [1] "Current resolution is 7"
## [1] "Current silhouette width is -0.28004542953883"
## [1] "Current number of clusters is 53"
## [1] "Current resolution is 8"
## [1] "Current silhouette width is -0.222344288810828"
## [1] "Current number of clusters is 58"
## [1] "Current resolution is 9"
## [1] "Current silhouette width is -0.295608672860346"
## [1] "Current number of clusters is 64"
## [1] "Current resolution is 10"
## [1] "Current silhouette width is -0.330103497513141"
## [1] "Current number of clusters is 69"
## [1] "2M"
## [1] "Current resolution is 0.005"
## [1] "Current silhouette width is -0.00649730398099191"
## [1] "Current number of clusters is 2"
## [1] "Current resolution is 0.01"
## [1] "Current resolution is 0.02"
## [1] "Current resolution is 0.03"
## [1] "Current resolution is 0.04"
## [1] "Current silhouette width is -0.010702258369204"
## [1] "Current number of clusters is 3"
## [1] "Current resolution is 0.05"
## [1] "Current resolution is 0.06"
## [1] "Current resolution is 0.07"
## [1] "Current resolution is 0.08"
## [1] "Current resolution is 0.09"
## [1] "Current silhouette width is -0.0172514257714861"
## [1] "Current number of clusters is 4"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.2"
## [1] "Current resolution is 0.3"
## [1] "Current silhouette width is -0.0182525510068341"
## [1] "Current number of clusters is 5"
## [1] "Current resolution is 0.4"
## [1] "Current resolution is 0.5"
## [1] "Current silhouette width is -0.0319931395710725"
## [1] "Current number of clusters is 7"
## [1] "Current resolution is 0.6"
## [1] "Current resolution is 0.7"
## [1] "Current silhouette width is -0.0302992944167953"
## [1] "Current number of clusters is 9"
## [1] "Current resolution is 0.8"
## [1] "Current silhouette width is -0.0330197371340559"
## [1] "Current number of clusters is 10"
## [1] "Current resolution is 0.9"
## [1] "Current silhouette width is -0.0459159879805063"
## [1] "Current number of clusters is 11"
## [1] "Current resolution is 1"
## [1] "Current silhouette width is -0.0793688966633591"
## [1] "Current number of clusters is 12"
## [1] "Current resolution is 1"
## [1] "Current resolution is 2"
## [1] "Current silhouette width is -0.0685155550174229"
## [1] "Current number of clusters is 19"
## [1] "Current resolution is 3"
## [1] "Current silhouette width is -0.102223660052064"
## [1] "Current number of clusters is 24"
## [1] "Current resolution is 4"
## [1] "Current silhouette width is -0.103833034157114"
## [1] "Current number of clusters is 31"
## [1] "Current resolution is 5"
## [1] "Current silhouette width is -0.131616296340424"
## [1] "Current number of clusters is 37"
## [1] "Current resolution is 6"
## [1] "Current silhouette width is -0.115185684494556"
## [1] "Current number of clusters is 41"
## [1] "Current resolution is 7"
## [1] "Current silhouette width is -0.162701225248331"
## [1] "Current number of clusters is 44"
## [1] "Current resolution is 8"
## [1] "Current silhouette width is -0.149213578032777"
## [1] "Current number of clusters is 47"
## [1] "Current resolution is 9"
## [1] "Current silhouette width is -0.128743489150844"
## [1] "Current number of clusters is 51"
## [1] "Current resolution is 10"
## [1] "Current silhouette width is -0.176211926577168"
## [1] "Current number of clusters is 57"
## [1] "2F"
## [1] "Current resolution is 0.005"
## [1] "Current resolution is 0.01"
## [1] "Current silhouette width is -0.0314677648448414"
## [1] "Current number of clusters is 2"
## [1] "Current resolution is 0.02"
## [1] "Current resolution is 0.03"
## [1] "Current resolution is 0.04"
## [1] "Current resolution is 0.05"
## [1] "Current resolution is 0.06"
## [1] "Current silhouette width is -0.026454858473299"
## [1] "Current number of clusters is 3"
## [1] "Current resolution is 0.07"
## [1] "Current silhouette width is -0.0571885733605773"
## [1] "Current number of clusters is 4"
## [1] "Current resolution is 0.08"
## [1] "Current resolution is 0.09"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.1"
## [1] "Current resolution is 0.2"
## [1] "Current silhouette width is -0.0216034933152717"
## [1] "Current number of clusters is 5"
## [1] "Current resolution is 0.3"
## [1] "Current resolution is 0.4"
## [1] "Current silhouette width is -0.0249616244456837"
## [1] "Current number of clusters is 7"
## [1] "Current resolution is 0.5"
## [1] "Current resolution is 0.6"
## [1] "Current silhouette width is -0.0310750357643001"
## [1] "Current number of clusters is 9"
## [1] "Current resolution is 0.7"
## [1] "Current silhouette width is -0.0322967352797902"
## [1] "Current number of clusters is 10"
## [1] "Current resolution is 0.8"
## [1] "Current resolution is 0.9"
## [1] "Current resolution is 1"
## [1] "Current resolution is 1"
## [1] "Current resolution is 2"
## [1] "Current silhouette width is -0.0568674780657195"
## [1] "Current number of clusters is 14"
## [1] "Current resolution is 3"
## [1] "Current silhouette width is -0.066423304195434"
## [1] "Current number of clusters is 21"
## [1] "Current resolution is 4"
## [1] "Current silhouette width is -0.130084136666141"
## [1] "Current number of clusters is 27"
## [1] "Current resolution is 5"
## [1] "Current silhouette width is -0.0949918438495278"
## [1] "Current number of clusters is 31"
## [1] "Current resolution is 6"
## [1] "Current silhouette width is -0.129340508084619"
## [1] "Current number of clusters is 35"
## [1] "Current resolution is 7"
## [1] "Current silhouette width is -0.162644225840953"
## [1] "Current number of clusters is 40"
## [1] "Current resolution is 8"
## [1] "Current silhouette width is -0.156170082988737"
## [1] "Current number of clusters is 47"
## [1] "Current resolution is 9"
## [1] "Current silhouette width is -0.156943365462931"
## [1] "Current number of clusters is 52"
## [1] "Current resolution is 10"
## [1] "Current silhouette width is -0.246543682845399"
## [1] "Current number of clusters is 54"
resolution_plots <- purrr::map2(avgs_sil_width_dfs, names(avgs_sil_width_dfs), function(df, cond) {
ggplot(df, aes(num_k, sil_width, label = num_k)) +
geom_text() +
labs(title = cond,
x = "Number of clusters (k)",
y = "Average Silhouette Width") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
})
resolution_plots
## $`0M`
##
## $`0F`
##
## $`2M`
##
## $`2F`
We aim to determine a number of clusters that maximizes silhouette width while preventing overstratification. Judgding by the previous plots, we define the following k:
optimal_k <- c(6, 6, 4, 3)
t_act_l <- purrr::pmap(list(t_act_l, avgs_sil_width_dfs, optimal_k), function(seurat, df, opt_k) {
opt_resolution <- df[df$num_k == opt_k, "resolution"]
seurat <- FindClusters(object = seurat, resolution = opt_resolution)
seurat
})
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6092
## Number of edges: 245369
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9629
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6751
## Number of edges: 271189
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9625
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3218
## Number of edges: 123564
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9486
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2705
## Number of edges: 108671
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9500
## Number of communities: 3
## Elapsed time: 0 seconds
purrr::map(t_act_l, ~ length(levels(.x$seurat_clusters)))
## $`0M`
## [1] 6
##
## $`0F`
## [1] 6
##
## $`2M`
## [1] 4
##
## $`2F`
## [1] 3
We can visualize the former clusters with a t-Stochastic Neighbor Embedding (tSNE) and uniform manifold approximation and projection (UMAP), which allow to depict more structure in the data than PCA:
t_act_l <- purrr::map(t_act_l, function(seurat) {
seurat@reductions$PCA <- NULL
seurat@reductions$TSNE <- NULL
seurat@reductions$UMAP <- NULL
seurat
})
t_act_l <- purrr::map(t_act_l, RunTSNE, reduction = "pca", dims = 1:20)
t_act_l <- purrr::map(t_act_l, RunUMAP, reduction = "pca", dims = 1:20)
purrr::map(t_act_l, DimPlot, reduction = "tsne")
## $`0M`
##
## $`0F`
##
## $`2M`
##
## $`2F`
purrr::map(t_act_l, DimPlot, reduction = "umap")
## $`0M`
##
## $`0F`
##
## $`2M`
##
## $`2F`
# saveRDS(t_act_l, file = "results/R_objects/t_act_Seurat_list_pre_processed.rds")
# t_act_l <- readRDS("results/R_objects/t_act_Seurat_list_pre_processed.rds")
Let us find the markers for each of the clusters. That is, the genes that are exclusively expressed in one cluster:
markers_l <- purrr::map(t_act_l, FindAllMarkers, only.pos = TRUE)
DT::datatable(markers_l$`0M`)
DT::datatable(markers_l$`0F`)
DT::datatable(markers_l$`2M`)
DT::datatable(markers_l$`2F`)
# saveRDS(markers_l, file = "results/R_objects/t_act_markers_list.rds")
marker_selection <- purrr::map2(t_act_l, markers_l, function(seurat, df) {
num_k <- length(unique(seurat$seurat_clusters))
selection <- unlist(purrr::map(0:(num_k-1), ~df[df$cluster == .x, "gene"][0:8]))
selection
})
heatmaps_markers <- purrr::map2(t_act_l, marker_selection, ~DoHeatmap(.x, features = .y) + NoLegend())
heatmaps_markers
## $`0M`
##
## $`0F`
##
## $`2M`
##
## $`2F`
After inspecting the markers, there are still some clusters of unknown identity. Let us shed more light by detecting the cycling cells in our datasets using cell cycle signatures:
t_act_l <- purrr::map(t_act_l, function(seurat) {
s_genes <- cc.genes$s.genes[cc.genes$s.genes %in% rownames(seurat@assays$RNA@scale.data)]
g2m_genes <- cc.genes$g2m.genes[cc.genes$g2m.genes %in% rownames(seurat@assays$RNA@scale.data)]
seurat <- CellCycleScoring(object = seurat, s.features = s_genes, g2m.features = g2m_genes)
seurat
})
cycling_gg <- purrr::map(t_act_l, FeaturePlot, features = c("S.Score", "G2M.Score"), cols = viridis(20))
cycling_gg
## $`0M`
##
## $`0F`
##
## $`2M`
##
## $`2F`
With the previous results, we can proceed to annotate the cells to its corresponding cell type
| Cluster ID | Cell type |
|---|---|
| 0 | CD4 T |
| 1 | Monocytes |
| 2 | Cytotoxic |
| 3 | B |
| 4 | FCGR3A Monocytes |
| 5 | Dendritic Cells |
FeaturePlot(t_act_l$`0M`, features = "IL7R")
t_act_l$`0M`$cell_type <- case_when(
t_act_l$`0M`$seurat_clusters == "0" ~ "CD4 T",
t_act_l$`0M`$seurat_clusters == "1" ~ "Monocyte",
t_act_l$`0M`$seurat_clusters == "2" ~ "Cytotoxic",
t_act_l$`0M`$seurat_clusters == "3" ~ "B",
t_act_l$`0M`$seurat_clusters == "4" ~ "FCGR3A Monocyte",
t_act_l$`0M`$seurat_clusters == "5" ~ "Dendritic Cell"
)
Idents(t_act_l$`0M`) <- "cell_type"
DimPlot(t_act_l$`0M`, reduction = "umap", label = TRUE) + NoLegend()
| Cluster ID | Cell type |
|---|---|
| 0 | CD4 T |
| 1 | Cytotoxic |
| 2 | Monocyte |
| 3 | B |
| 4 | Monocyte |
| 5 | Unknown |
FeaturePlot(t_act_l$`0F`, features = "IL7R")
t_act_l$`0F`$cell_type <- case_when(
t_act_l$`0F`$seurat_clusters == "0" ~ "CD4 T",
t_act_l$`0F`$seurat_clusters == "1" ~ "Cytotoxic",
t_act_l$`0F`$seurat_clusters == "2" ~ "Monocyte",
t_act_l$`0F`$seurat_clusters == "3" ~ "B",
t_act_l$`0F`$seurat_clusters == "4" ~ "Monocyte",
t_act_l$`0F`$seurat_clusters == "5" ~ "Unknown"
)
Idents(t_act_l$`0F`) <- "cell_type"
DimPlot(t_act_l$`0F`, reduction = "umap", label = TRUE) + NoLegend()
| Cluster ID | Cell type |
|---|---|
| 0 | Cycling |
| 1 | CD4 T |
| 2 | Cytotoxic |
| 3 | B |
t_act_l$`2M`$cell_type <- case_when(
t_act_l$`2M`$seurat_clusters == "0" ~ "Cycling",
t_act_l$`2M`$seurat_clusters == "1" ~ "CD4 T",
t_act_l$`2M`$seurat_clusters == "2" ~ "Cytotoxic",
t_act_l$`2M`$seurat_clusters == "3" ~ "B"
)
Idents(t_act_l$`2M`) <- "cell_type"
DimPlot(t_act_l$`2M`, reduction = "umap", label = TRUE) + NoLegend()
| Cluster ID | Cell type |
|---|---|
| 0 | T |
| 1 | Cycling |
| 2 | B |
t_act_l$`2F`$cell_type <- case_when(
t_act_l$`2F`$seurat_clusters == "0" ~ "T",
t_act_l$`2F`$seurat_clusters == "1" ~ "Cycling",
t_act_l$`2F`$seurat_clusters == "2" ~ "B"
)
Idents(t_act_l$`2F`) <- "cell_type"
DimPlot(t_act_l$`2F`, reduction = "umap", label = TRUE) + NoLegend()
# saveRDS(t_act_l, file = "results/R_objects/t_act_Seurat_list_annotated")
# t_act <- SplitObject(t_act, split.by = "donor")
#
# # Male
# t_act_m <- pre_process_seurat(t_act$Male)
# Idents(t_act_m) <- "day"
# umap_day <- DimPlot(t_act_m, reduction = "umap", pt.size = 0.9)
# Idents(t_act_m) <- "time"
# umap_time <- DimPlot(t_act_m, reduction = "umap", pt.size = 0.9)
# ggarrange(plotlist = list(umap_day, umap_time))
# monocyte_male <- FeaturePlot(t_act_m, features = "LYZ", reduction = "umap", pt.size = 0.9)
#
# # Female
# t_act_f <- pre_process_seurat(t_act$Female)
# Idents(t_act_f) <- "day"
# umap_day <- DimPlot(t_act_f, reduction = "umap", pt.size = 0.9)
# Idents(t_act_f) <- "time"
# umap_time <- DimPlot(t_act_f, reduction = "umap", pt.size = 0.9)
# ggarrange(plotlist = list(umap_day, umap_time))
# monocyte_female <- FeaturePlot(t_act_f, features = "LYZ", reduction = "umap", pt.size = 0.9)
# monocyte_female
#
#
# # score with stimulated-T cells signature
# t_act_f <- AddModuleScore(t_act_f, features = list(t_stimuli_sign), name = "stimulated_t_score")
# FeaturePlot(t_act_f, features = "stimulated_t_score1", reduction = "umap", pt.size = 0.9)
#
# FeaturePlot(t_act_f, features = "TRAC", reduction = "umap", pt.size = 0.9)
EXPLORATORY2:
# Split by day and donor: Day 0 - Male Donor
# seurat_l <- SplitObject(t_act$day_0, split.by = "donor")
# seurat <- seurat_l$Male
# seurat <- pre_process_seurat(seurat)
# seurat <- FindVariableFeatures(seurat)
# seurat <- ScaleData(seurat)
# seurat <- RunPCA(seurat)
# seurat <- RunUMAP(seurat, dims = 1:15, reduction = "pca")
# seurat@reductions$UMAP <- NULL
# Idents(seurat) <- "time"
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# seurat <- FindNeighbors(seurat)
# seurat <- FindClusters(seurat, dims = 1:20, reduction = "pca", resolution = 0.1)
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# markers <- FindAllMarkers(seurat)
# marker_selection <- unlist(map(0:5, ~markers[markers$cluster == ., "gene"][0:5]))
# marker_selection
# DoHeatmap(seurat, features = marker_selection) + NoLegend()
# FeaturePlot(seurat, features = c("IL7R", "GNLY", "LYZ", "MS4A1", "FCGR3A"), reduction = "umap")
#
#
# # Split by day and donor: Day 2 - Male Donor
# seurat_l <- SplitObject(t_act$day_2, split.by = "donor")
# seurat <- seurat_l$Male
# seurat <- pre_process_seurat(seurat)
# seurat@reductions$UMAP <- NULL
# Idents(seurat) <- "time"
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# seurat <- FindNeighbors(seurat)
# seurat <- FindClusters(seurat, dims = 1:20, reduction = "pca", resolution = 0.1)
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# markers <- FindAllMarkers(seurat)
# marker_selection <- unlist(map(0:4, ~markers[markers$cluster == ., "gene"][0:10]))
# marker_selection
# DoHeatmap(seurat, features = marker_selection) + NoLegend()
# FeaturePlot(seurat, features = c("IL7R", "GNLY", "MS4A1"), reduction = "umap")
#
# seurat <- subset(seurat, subset = seurat_clusters != "2")
# seurat <- CellCycleScoring(seurat, s.features = cc.genes$s.genes, g2m.features = cc.genes$g2m.genes)
# FeaturePlot(seurat, features = c("S.Score", "G2M.Score"))
# seurat <- pre_process_seurat(seurat, vars_to_regress = c("S.Score", "G2M.Score"))
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
# markers2 <- FindAllMarkers(seurat)
# marker_selection2 <- unlist(map(c(0,1,3,4), ~markers2[markers2$cluster == ., "gene"][0:8]))
# marker_selection2
# DoHeatmap(seurat, features = marker_selection) + NoLegend()
# FeaturePlot(seurat, features = c("IL7R", "GNLY", "MS4A1", "IFNG"), reduction = "umap")
# Idents(seurat) <- "seurat_clusters"
# DimPlot(seurat, reduction = "umap", pt.size = 0.8)
#
# b <- subset(seurat, idents = "4")
# b <- pre_process_seurat(b)
# Idents(b) <- "time"
# DimPlot(b, reduction = "umap", pt.size = 0.8)
# b <- FindNeighbors(b)
# b <- FindClusters(b, resolution = 0.2)
# DimPlot(b, reduction = "umap", pt.size = 0.8)
# markers_b <- FindAllMarkers(b)